#clean up
rm(list=ls())

#load libraries
library(akima)
library(geoR)
library(scatterplot3d)  
library(ggmap)
library(lattice)
library(foreign)
library(maps)
library(maptools)
library(spdep)
library(rgdal)
library(prevR)
library(car)
library(xtable)
library(rgdal)

#options
options(scipen=8)

#set working directory
#setwd('/Users/monogan/Documents/census2010/')
#setwd('/Users/monogan/Documents/BIGdata/census2010raw/')

###BLOCK DATA###
#SEX AND AGE#
#Accessed on 13 October 2015 from https://www.nhgis.org
#Codebook: nhgis0008_ds172_2010_block_codebook.txt
a<-read.csv('nhgis0008_ds172_2010_block.csv')
a$TOTALBLOCK<-a$H7X001
a$WHITE<-a$H7X002
a$BLACK<-a$H7X003
a$OTHER<-a$H7X004+a$H7X005+a$H7X006+a$H7X007+a$H7X008
#b<-subset(a,select=-c(YEAR,REGION,DIVISION,COUSUB,PLACE,RES_ONLYA,TRUSTA,AIANHH,CBSA,METDIV,CSA,NECTA,NECTADIV,CNECTA,UA,SDELM,SDSEC,SDUNI,SABINSA,H7X001,H7X002,H7X003,H7X004,H7X005,H7X006,H7X007,H7X008,H76001,H76002,H76026))
b<-subset(a,select=-c(YEAR,REGION,DIVISION,COUSUB,PLACE,RES_ONLYA,TRUSTA,AIANHH,AIANHHA,AITSCEA,ANRCA,CBSA,CBSAA,METDIV,METDIVA,CSA,CSAA,NECTA,NECTAA,NECTADIV,NECTADIVA,CNECTA,CNECTAA,UA,UAA,SUBMCDA,SDELM,SDELMA,SDSEC,SDSECA,SDUNI,SDUNIA,SABINSA,H7X001,H7X002,H7X003,H7X004,H7X005,H7X006,H7X007,H7X008,H76001,H76002,H76026))#some of these identifiers may be worth adding back

#HOMEOWNERSHIP#
#Accessed on 29 October 2015 from https://www.nhgis.org
#Codebook: nhgis0009_ds172_2010_block_codebook.txt
c<-read.csv('nhgis0009_ds172_2010_block.csv')
c$TOTHOUSEBLOCK<-c$IFM001
c$HOMEOWN<-c$IFM002+c$IFM003
c$RENT<-c$IFM004
d<-subset(c,select=c(GISJOIN,TOTHOUSEBLOCK,HOMEOWN,RENT))

#merge block data and prepare block group index
e<-merge(x=b,y=d,by="GISJOIN")
e$GISJOINbg<-substr(as.character(e$GISJOIN),1,nchar(as.character(e$GISJOIN))-3)


###BLOCK GROUP DATA###
#INCOME#
f<-read.csv('nhgis0010_ds176_20105_2010_blck_grp.csv')
f$GISJOINbg<-as.character(f$GISJOIN)
f$TOTALBG<-f$JPNE001
f$JPNE007008<-f$JPNE007+f$JPNE008
f$JPNE009010<-f$JPNE009+f$JPNE010
f$JPNE016017<-f$JPNE016+f$JPNE017
g<-subset(f,select=c(GISJOINbg,TOTALBG,JPNE002,JPNE003,JPNE004,JPNE005,JPNE006,JPNE007008,JPNE009010,JPNE011,JPNE012,JPNE013,JPNE014,JPNE015,JPNE016017))

#EDUCATION#
h<-read.csv('nhgis0012_ds176_20105_2010_blck_grp.csv')
h$GISJOINbg<-as.character(h$GISJOIN)
h$maleNOHS<-h$JN9E003+h$JN9E004+h$JN9E005+h$JN9E006+h$JN9E007+h$JN9E008+h$JN9E009+h$JN9E010
h$maleHS<-h$JN9E011
h$maleSOME<-h$JN9E012+h$JN9E013
h$male2year<-h$JN9E014
h$male4year<-h$JN9E015
h$maleGRAD<-h$JN9E016+h$JN9E017+h$JN9E018
h$femaleNOHS<-h$JN9E020+h$JN9E021+h$JN9E022+h$JN9E023+h$JN9E024+h$JN9E025+h$JN9E026+h$JN9E027
h$femaleHS<-h$JN9E028
h$femaleSOME<-h$JN9E029+h$JN9E030
h$female2year<-h$JN9E031
h$female4year<-h$JN9E032
h$femaleGRAD<-h$JN9E033+h$JN9E034+h$JN9E035
i<-subset(h,select=c(GISJOINbg,maleNOHS,maleHS,maleSOME,male2year,male4year,maleGRAD,femaleNOHS,femaleHS,femaleSOME,female2year,female4year,femaleGRAD))

#merge block group data together and with block data, then prepare tract index
j<-merge(x=g,y=i,by="GISJOINbg")
k<-merge(x=e,y=j,all.x=TRUE,by="GISJOINbg")
k$GISJOINtract<-substr(k$GISJOINbg,1,nchar(k$GISJOINbg)-1)

###TRACT DATA###
#EMPLOYMENT#
l<-read.csv('nhgis0013_ds177_20105_2010_tract.csv')
l$GISJOINtract<-as.character(l$GISJOIN)
l$TOTALTRACT<-l$J6QE001
l$m1619un<-l$J6QE008
l$m1619not<-l$J6QE009
l$m1619emp<-l$J6QE003-l$m1619un-l$m1619not
l$m2021un<-l$J6QE015
l$m2021not<-l$J6QE016
l$m2021emp<-l$J6QE010-l$m2021un-l$m2021not
l$m2224un<-l$J6QE022
l$m2224not<-l$J6QE023
l$m2224emp<-l$J6QE017-l$m2224un-l$m2224not
l$m2529un<-l$J6QE029
l$m2529not<-l$J6QE030
l$m2529emp<-l$J6QE024-l$m2529un-l$m2529not
l$m3034un<-l$J6QE036
l$m3034not<-l$J6QE037
l$m3034emp<-l$J6QE031-l$m3034un-l$m3034not
l$m3544un<-l$J6QE043
l$m3544not<-l$J6QE044
l$m3544emp<-l$J6QE038-l$m3544un-l$m3544not
l$m4554un<-l$J6QE050
l$m4554not<-l$J6QE051
l$m4554emp<-l$J6QE045-l$m4554un-l$m4554not
l$m5559un<-l$J6QE057
l$m5559not<-l$J6QE058
l$m5559emp<-l$J6QE052-l$m5559un-l$m5559not
l$m6061un<-l$J6QE064
l$m6061not<-l$J6QE065
l$m6061emp<-l$J6QE059-l$m6061un-l$m6061not
l$m6264un<-l$J6QE071
l$m6264not<-l$J6QE072
l$m6264emp<-l$J6QE066-l$m6264un-l$m6264not
l$m6569un<-l$J6QE076
l$m6569not<-l$J6QE077
l$m6569emp<-l$J6QE073-l$m6569un-l$m6569not
l$m7074un<-l$J6QE081
l$m7074not<-l$J6QE082
l$m7074emp<-l$J6QE078-l$m7074un-l$m7074not
l$m75UPun<-l$J6QE086
l$m75UPnot<-l$J6QE087
l$m75UPemp<-l$J6QE083-l$m75UPun-l$m75UPnot
l$f1619un<-l$J6QE094
l$f1619not<-l$J6QE095
l$f1619emp<-l$J6QE089-l$f1619un-l$f1619not
l$f2021un<-l$J6QE101
l$f2021not<-l$J6QE102
l$f2021emp<-l$J6QE096-l$f2021un-l$f2021not
l$f2224un<-l$J6QE108
l$f2224not<-l$J6QE109
l$f2224emp<-l$J6QE103-l$f2224un-l$f2224not
l$f2529un<-l$J6QE115
l$f2529not<-l$J6QE116
l$f2529emp<-l$J6QE110-l$f2529un-l$f2529not
l$f3034un<-l$J6QE122
l$f3034not<-l$J6QE123
l$f3034emp<-l$J6QE117-l$f3034un-l$f3034not
l$f3544un<-l$J6QE129
l$f3544not<-l$J6QE130
l$f3544emp<-l$J6QE124-l$f3544un-l$f3544not
l$f4554un<-l$J6QE136
l$f4554not<-l$J6QE137
l$f4554emp<-l$J6QE131-l$f4554un-l$f4554not
l$f5559un<-l$J6QE143
l$f5559not<-l$J6QE144
l$f5559emp<-l$J6QE138-l$f5559un-l$f5559not
l$f6061un<-l$J6QE150
l$f6061not<-l$J6QE151
l$f6061emp<-l$J6QE145-l$f6061un-l$f6061not
l$f6264un<-l$J6QE157
l$f6264not<-l$J6QE158
l$f6264emp<-l$J6QE152-l$f6264un-l$f6264not
l$f6569un<-l$J6QE162
l$f6569not<-l$J6QE163
l$f6569emp<-l$J6QE159-l$f6569un-l$f6569not
l$f7074un<-l$J6QE167
l$f7074not<-l$J6QE168
l$f7074emp<-l$J6QE164-l$f7074un-l$f7074not
l$f75UPun<-l$J6QE172
l$f75UPnot<-l$J6QE173
l$f75UPemp<-l$J6QE169-l$f75UPun-l$f75UPnot
m<-subset(l,select=c(GISJOINtract,TOTALTRACT,m1619un,m1619not,m1619emp,m2021un,m2021not,m2021emp,m2224un,m2224not,m2224emp,m2529un,m2529not,m2529emp,m3034un,m3034not,m3034emp,m3544un,m3544not,m3544emp,m4554un,m4554not,m4554emp,m5559un,m5559not,m5559emp,m6061un,m6061not,m6061emp,m6264un,m6264not,m6264emp,m6569un,m6569not,m6569emp,m7074un,m7074not,m7074emp,m75UPun,m75UPnot,m75UPemp,f1619un,f1619not,f1619emp,f2021un,f2021not,f2021emp,f2224un,f2224not,f2224emp,f2529un,f2529not,f2529emp,f3034un,f3034not,f3034emp,f3544un,f3544not,f3544emp,f4554un,f4554not,f4554emp,f5559un,f5559not,f5559emp,f6061un,f6061not,f6061emp,f6264un,f6264not,f6264emp,f6569un,f6569not,f6569emp,f7074un,f7074not,f7074emp,f75UPun,f75UPnot,f75UPemp))

#merge with lower levels of data and create state-county FIPS identifier
n<-merge(x=k,y=m,all.x=TRUE,by="GISJOINtract")
n$FIPS<-(n$STATEA*1000)+n$COUNTYA

###COUNTY DATA###
#RURALISM#
#USDA, 2013 Rural-Urban Continuum Codes
#Accessed 15 October 2015 from http://www.ers.usda.gov/data-products/rural-urban-continuum-codes.aspx#.UfCK11OE4xc
o<-read.csv("ruralurbancodes2013.csv")
o$rural<-o$RUCC_2013-1
o$COUNTYPOP10<-o$Population_2010
p<-subset(o,select=c(FIPS,rural,COUNTYPOP10))

#RELIGION#
#ARDA, U.S. Religion Census: Religious Congregations and Membership Study, 2010
#Accessed 15 October 2015 from http://www.thearda.com/Archive/Files/Descriptions/RCMSCY10.asp
q<-read.dta("religionCounty2010.DTA")
q[is.na(q)]<-0
q$FIPS<-q$fips
q$jewish<-q$cjudadh+q$ojudadh+q$rjudadh+q$rfrmadh
q$other<-q$POP2010-q$cathadh-q$ldsadh-q$orthadh-q$jewish-q$mslmadh-q$mprtadh-q$evanadh
q$other[q$other<0]<-0
r<-subset(q,select=c(FIPS,cathadh,ldsadh,orthadh,jewish,mslmadh,mprtadh,evanadh,other))

#merge county data together and with tract data
s<-merge(x=p,y=r,all.x=FALSE,all.y=TRUE,by="FIPS")
t<-merge(x=n,y=s,all.x=TRUE,by="FIPS")

###LOAD AND MERGE GEOGRAPHIC LOCATORS FOR BLOCKS###
geoBlock<-read.csv("geoBlock.csv")
u<-merge(x=t,y=geoBlock,all.x=TRUE,by=c("STATEA","COUNTYA","TRACTA","BLOCKA"))

###CREATE SUBSET OF POPULATED BLOCKS###
v<-subset(u,subset=TOTALBLOCK>0)

###MOVE ALASKA AND HAWAII###
v$blockEastTRUE<-v$blockEast
v$blockNorthTRUE<-v$blockNorth
v$blockEast[v$STATE=="Hawaii"]<-v$blockEastTRUE[v$STATE=="Hawaii"]-(-6009 + 2337)+75
v$blockNorth[v$STATE=="Hawaii"]<-v$blockNorthTRUE[v$STATE=="Hawaii"]-(217+348.6)+450 
v$blockEast[v$STATE=="Alaska"]<-v$blockEastTRUE[v$STATE=="Alaska"]-(-2287+ 2337)
v$blockNorth[v$STATE=="Alaska"]<-v$blockNorthTRUE[v$STATE=="Alaska"]-(3456+348.6)+500
plot(x=v$blockEast,y=v$blockNorth,pch=".")
points(x=v$blockEast[v$STATE=="Alaska"],y=v$blockNorth[v$STATE=="Alaska"],pch=".",col="red")
points(x=v$blockEast[v$STATE=="Hawaii"],y=v$blockNorth[v$STATE=="Hawaii"],pch=".",col="blue")

###WRITE OUT DATA###
write.csv(v,file="block2010.csv",row.names=FALSE)
write.csv(u,file="block2010uninhabit.csv",row.names=FALSE)


